Global Healthcare Expenditure Analysis¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

DATA CLEANING¶

In [2]:
data=pd.read_csv("/Users/sahithirao/Desktop/Sem3/DataViz/Project/Data set/dei-global-health-expenditure-world_dataset_Data-dei-global-health-expenditure-world.csv")
In [3]:
data.head(5)
Out[3]:
Unnamed: 0 country code region income year che_gdp che_pc_usd hk_gdp hk_g_gdp ... age1_gghed_ppp2020_pc age1_ext_ppp2020_pc age1_pvtd_ppp2020_pc hk_ppp2020_pc hk_gghed_ppp2020_pc hk_ext_ppp2020_pc hk_pvtd_ppp2020_pc gdp_ppp2020_pc pfc_ppp2020_pc gge_ppp2020_pc
0 0 Algeria DZA AFR Lower-middle 2000 3.489033 62.117695 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 9422.670850 3917.143779 2692.144010
1 1 Algeria DZA AFR Lower-middle 2001 3.837877 67.338501 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 9572.762464 4184.332806 2991.622448
2 2 Algeria DZA AFR Lower-middle 2002 3.730042 66.947601 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 9973.432108 4386.695961 3419.400063
3 3 Algeria DZA AFR Lower-middle 2003 3.601041 76.235474 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 10547.676227 4270.038642 3396.579475
4 4 Algeria DZA AFR Lower-middle 2004 3.544073 93.024330 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 10847.517657 4182.638819 3337.235958

5 rows × 3221 columns

In [4]:
#understanding the data
data.shape
Out[4]:
(4224, 3221)
In [5]:
#checking null values
data.isna().sum()
Out[5]:
Unnamed: 0               0
country                  0
code                     0
region                   0
income                   0
                      ... 
hk_ext_ppp2020_pc     3540
hk_pvtd_ppp2020_pc    3881
gdp_ppp2020_pc         218
pfc_ppp2020_pc         218
gge_ppp2020_pc         223
Length: 3221, dtype: int64
In [6]:
#dropping the columns if more than 30% of the data is null
data_cleaned = data.dropna(axis=1, thresh=3000)
In [7]:
data_cleaned.shape
Out[7]:
(4224, 415)
In [8]:
data_cleaned=data_cleaned.dropna()
In [9]:
#now the rows and columns went down to 2153 and 415 respectively
data_cleaned.shape
Out[9]:
(2153, 415)
In [10]:
data_cleaned
Out[10]:
Unnamed: 0 country code region income year che_gdp che_pc_usd che gghed ... hf121_ppp2020_pc hf122_ppp2020_pc hf13_ppp2020_pc hf2_ppp2020_pc hf21_ppp2020_pc hf22_ppp2020_pc hf3_ppp2020_pc gdp_ppp2020_pc pfc_ppp2020_pc gge_ppp2020_pc
0 0 Algeria DZA AFR Lower-middle 2000 3.489033 62.117695 143870.265625 103533.985000 ... 85.652478 0.0 0.0 7.359546 2.689580 0.182809 84.802063 9422.670850 3917.143779 2692.144010
1 1 Algeria DZA AFR Lower-middle 2001 3.837877 67.338501 162230.890625 123663.777000 ... 94.641082 0.0 0.0 7.720256 3.113848 0.192492 79.608176 9572.762464 4184.332806 2991.622448
2 2 Algeria DZA AFR Lower-middle 2002 3.730042 66.947601 168702.312500 126996.860810 ... 96.488899 0.0 0.0 8.160043 3.550284 0.199467 83.795529 9973.432108 4386.695961 3419.400063
3 3 Algeria DZA AFR Lower-middle 2003 3.601041 76.235474 189137.484375 145057.483429 ... 98.706304 0.0 0.0 8.143257 3.725214 0.200820 80.328056 10547.676227 4270.038642 3396.579475
4 4 Algeria DZA AFR Lower-middle 2004 3.544073 93.024330 217928.593750 155499.678178 ... 93.169561 0.0 0.0 9.755374 5.556859 0.194049 100.318061 10847.517657 4182.638819 3337.235958
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4152 4152 Tonga TON WPR Upper-middle 2016 4.818208 191.936722 44.953880 27.868431 ... 0.000000 0.0 0.0 40.786449 4.164367 35.238399 16.521059 6373.839114 6156.507343 2370.548952
4153 4153 Tonga TON WPR Upper-middle 2017 4.855778 212.571030 49.431820 29.589055 ... 0.000000 0.0 0.0 45.379721 4.301449 39.685420 17.064897 6603.159738 6359.166759 2620.507401
4154 4154 Tonga TON WPR Upper-middle 2018 4.806281 219.287933 51.571392 34.362265 ... 0.000000 0.0 0.0 33.277787 4.537564 27.364712 18.001625 6636.249645 6708.234517 2634.708619
4155 4155 Tonga TON WPR Upper-middle 2019 4.746700 229.944107 55.251587 33.663803 ... 0.000000 0.0 0.0 41.151269 4.327241 35.435537 17.167221 6698.871162 6397.297133 2578.259691
4156 4156 Tonga TON WPR Upper-middle 2020 5.317539 248.039581 60.035019 34.282748 ... 0.000000 0.0 0.0 51.679572 4.505345 45.780973 17.873803 6721.856777 6660.602389 2589.909387

2153 rows × 415 columns

In [11]:
#there are lot of 0 values in the dataset, checking cols having 0s
zeroes=data_cleaned.columns[data_cleaned.eq(0).any()]
data_cleaned[zeroes]
Out[11]:
Unnamed: 0 ext vpp_che ext_che ext_pc_usd chi_che shi_che chi_pvt_che vhi_che fs2 ... fs4_ppp2020_pc fs5_ppp2020_pc fs7_ppp2020_pc hf12_ppp2020_pc hf121_ppp2020_pc hf122_ppp2020_pc hf13_ppp2020_pc hf2_ppp2020_pc hf21_ppp2020_pc hf22_ppp2020_pc
0 0 75.081571 0.008181 0.052187 0.032417 26.053186 26.053186 0.0 0.818098 5.000000 ... 0.0 2.689580 0.160144 85.652478 85.652478 0.0 0.0 7.359546 2.689580 0.182809
1 1 75.081571 0.008476 0.046281 0.031165 25.760326 25.760326 0.0 0.847557 5.000000 ... 0.0 3.113848 0.158708 94.641082 94.641082 0.0 0.0 7.720256 3.113848 0.192492
2 2 75.081571 0.009543 0.044505 0.029795 25.936958 25.936958 0.0 0.954344 5.000000 ... 0.0 3.550284 0.154540 96.488899 96.488899 0.0 0.0 8.160043 3.550284 0.199467
3 3 95.000000 0.009808 0.050228 0.038292 25.987232 25.987232 0.0 0.980768 25.000000 ... 0.0 3.725214 0.140574 98.706304 98.706304 0.0 0.0 8.143257 3.725214 0.200820
4 4 102.000000 0.014454 0.046804 0.043539 24.234888 24.234888 0.0 1.445428 31.918429 ... 0.0 5.556859 0.123630 93.169561 93.169561 0.0 0.0 9.755374 5.556859 0.194049
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4152 4152 12.859255 0.013560 28.605438 54.904340 0.000000 0.000000 0.0 1.356008 8.696801 ... 0.0 4.164367 28.436025 0.000000 0.000000 0.0 0.0 40.786449 4.164367 35.238399
4153 4153 15.250778 0.013415 30.852146 65.582725 0.000000 0.000000 0.0 1.341542 10.215766 ... 0.0 4.301449 32.659126 0.000000 0.000000 0.0 0.0 45.379721 4.301449 39.685420
4154 4154 12.143988 0.014226 23.547916 51.637736 0.000000 0.000000 0.0 1.422627 8.917877 ... 0.0 4.537564 19.952731 0.000000 0.000000 0.0 0.0 33.277787 4.537564 27.364712
4155 4155 16.383415 0.013609 29.652388 68.183922 0.000000 0.000000 0.0 1.360873 11.454327 ... 0.0 4.327241 28.367113 0.000000 0.000000 0.0 0.0 41.151269 4.327241 35.435537
4156 4156 20.523394 0.012605 34.185703 84.794081 0.000000 0.000000 0.0 1.260457 14.070116 ... 0.0 4.505345 38.421620 0.000000 0.000000 0.0 0.0 51.679572 4.505345 45.780973

2153 rows × 192 columns

In [12]:
zero_data = data_cleaned.eq(0).sum()
cols_to_keep=zero_data[zero_data <= 500].index
data_cleaned = data_cleaned[cols_to_keep]
In [13]:
data_cleaned.shape
Out[13]:
(2153, 325)

Filterig data dictionary based on cleaned column names¶

In [14]:
data_dict=pd.read_csv("/Users/sahithirao/Desktop/Sem3/DataViz/Project/Data set/dei-global-health-expenditure-world_dataset_Codebook-dei-global-health-expenditure-world.csv")
In [15]:
cleaned_column_names=data_cleaned.columns
filtered_data_dict = data_dict[data_dict['variable code'].isin(cleaned_column_names)]

Data Pre-processing¶

In [16]:
file=r"/Users/sahithirao/Desktop/Sem3/DataViz/Project/Data set/filtered_data.csv"
df = pd.read_csv(file)
In [17]:
df.head()
Out[17]:
Unnamed: 0.1 Unnamed: 0 country code region income year che_gdp che_pc_usd che ... hf121_ppp2020_pc hf122_ppp2020_pc hf13_ppp2020_pc hf2_ppp2020_pc hf21_ppp2020_pc hf22_ppp2020_pc hf3_ppp2020_pc gdp_ppp2020_pc pfc_ppp2020_pc gge_ppp2020_pc
0 0 0 Algeria DZA AFR Lower-middle 2000 3.489033 62.117695 143870.265625 ... 85.652478 0.0 0.0 7.359546 2.689580 0.182809 84.802063 9422.670850 3917.143779 2692.144010
1 1 1 Algeria DZA AFR Lower-middle 2001 3.837877 67.338501 162230.890625 ... 94.641082 0.0 0.0 7.720256 3.113848 0.192492 79.608176 9572.762464 4184.332806 2991.622448
2 2 2 Algeria DZA AFR Lower-middle 2002 3.730042 66.947601 168702.312500 ... 96.488899 0.0 0.0 8.160043 3.550284 0.199467 83.795529 9973.432108 4386.695961 3419.400063
3 3 3 Algeria DZA AFR Lower-middle 2003 3.601041 76.235474 189137.484375 ... 98.706304 0.0 0.0 8.143257 3.725214 0.200820 80.328056 10547.676227 4270.038642 3396.579475
4 4 4 Algeria DZA AFR Lower-middle 2004 3.544073 93.024330 217928.593750 ... 93.169561 0.0 0.0 9.755374 5.556859 0.194049 100.318061 10847.517657 4182.638819 3337.235958

5 rows × 416 columns

In [18]:
#renaming for income level per capita
df.rename(columns={'hf11_usd_pc':'Govn_schemes_USD_PC'},inplace=True)

#renaming for  highest healthcare expenditure
df.rename(columns={'che_pc_usd':'current healthcare expenditure(USD)'},inplace=True)

#renaming for total current healthcare expenditure
df.rename(columns={'gghed_che':'Domestic General Government Health Expenditure(GGHE-D) as %(CHE)'},inplace=True)
df.rename(columns={'pvtd_che':'Domestic Private Health Expenditure (PVT-D) as %(CHE)'},inplace=True)

#renaming for population and current healthcare expenditure
df.rename(columns={'pop':'Population'},inplace=True)

#renaming for health spending trends 
df.rename(columns={'che_gdp':'Current Health Expenditure(GDP)'},inplace=True)

Exploratory Data Analysis¶

In [19]:
df.shape
Out[19]:
(2153, 416)
In [20]:
df.columns
Out[20]:
Index(['Unnamed: 0.1', 'Unnamed: 0', 'country', 'code', 'region', 'income',
       'year', 'Current Health Expenditure(GDP)',
       'current healthcare expenditure(USD)', 'che',
       ...
       'hf121_ppp2020_pc', 'hf122_ppp2020_pc', 'hf13_ppp2020_pc',
       'hf2_ppp2020_pc', 'hf21_ppp2020_pc', 'hf22_ppp2020_pc',
       'hf3_ppp2020_pc', 'gdp_ppp2020_pc', 'pfc_ppp2020_pc', 'gge_ppp2020_pc'],
      dtype='object', length=416)
In [21]:
df.describe()
Out[21]:
Unnamed: 0.1 Unnamed: 0 year Current Health Expenditure(GDP) current healthcare expenditure(USD) che gghed pvtd ext dom_che ... hf121_ppp2020_pc hf122_ppp2020_pc hf13_ppp2020_pc hf2_ppp2020_pc hf21_ppp2020_pc hf22_ppp2020_pc hf3_ppp2020_pc gdp_ppp2020_pc pfc_ppp2020_pc gge_ppp2020_pc
count 2153.000000 2153.000000 2153.000000 2153.000000 2153.000000 2.153000e+03 2.153000e+03 2.153000e+03 2.153000e+03 2153.000000 ... 2153.000000 2153.000000 2153.0 2153.000000 2153.000000 2153.000000 2153.000000 2153.000000 2153.000000 2153.000000
mean 1602.274965 1602.274965 2010.376219 6.045880 624.941548 4.505470e+06 2.178595e+06 2.270971e+06 5.589772e+04 89.032424 ... 171.798315 40.279037 0.0 106.184199 79.298952 20.625910 249.115758 13650.316300 8130.219580 4639.690688
std 1133.148156 1133.148156 5.979335 2.517597 1419.583800 7.697960e+07 3.823974e+07 3.892272e+07 2.318279e+05 15.475841 ... 555.093395 332.038349 0.0 296.066504 258.771992 48.894818 300.911687 16705.256744 8709.085627 6848.013155
min 0.000000 0.000000 2000.000000 1.505008 4.487679 5.842166e-02 2.679574e-02 3.159592e-02 0.000000e+00 18.419533 ... 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.218731 339.582337 264.420668 19.122773
25% 712.000000 712.000000 2005.000000 4.234925 49.006382 2.212096e+03 1.081000e+03 7.693908e+02 9.300000e+00 84.201782 ... 0.000000 0.000000 0.0 11.328357 0.940935 2.254654 46.398863 3107.982261 2227.261391 622.654150
50% 1343.000000 1343.000000 2011.000000 5.417539 166.885742 3.047490e+04 1.070509e+04 1.307649e+04 5.146395e+02 96.306213 ... 1.669358 0.000000 0.0 30.605346 6.117801 6.857729 123.990363 8406.713257 5344.835598 2388.303190
75% 2303.000000 2303.000000 2016.000000 7.486594 465.081573 2.571768e+05 9.786816e+04 1.141192e+05 1.342176e+04 99.641861 ... 60.777451 0.000000 0.0 82.623259 57.792185 19.783227 357.985284 16413.652151 10382.344891 5166.580140
max 4156.000000 4156.000000 2021.000000 20.413412 11702.409180 2.102209e+09 1.132999e+09 9.690000e+08 2.371629e+06 100.907066 ... 5452.208601 3528.416516 0.0 3661.458751 3150.462151 510.854274 2355.271547 125106.034585 70040.684568 55962.805366

8 rows × 412 columns

In [22]:
df.isnull().sum()
Out[22]:
Unnamed: 0.1       0
Unnamed: 0         0
country            0
code               0
region             0
                  ..
hf22_ppp2020_pc    0
hf3_ppp2020_pc     0
gdp_ppp2020_pc     0
pfc_ppp2020_pc     0
gge_ppp2020_pc     0
Length: 416, dtype: int64
In [23]:
df['region'].value_counts()
Out[23]:
AFR     796
AMR     629
EUR     299
EMR     175
WPR     133
SEAR    121
Name: region, dtype: int64
In [24]:
# Create a dictionary to map region codes to names
region_mapping = {
    'AFR': 'Africa',
    'AMR': 'America',
    'EUR': 'Europe',
    'EMR': 'Eastern Mediterranean',
    'WPR': 'Western Pacific',
    'SEAR': 'South-East Asia'
}

df['region'] = df['region'].replace(region_mapping)
In [25]:
import pandas as pd
import plotly.express as px

# Assuming 'df' is your DataFrame
# If not, replace it with your actual DataFrame

# Define the columns of interest
columns_of_interest = [
    'Govn_schemes_USD_PC',
    'current healthcare expenditure(USD)',
    'Domestic General Government Health Expenditure(GGHE-D) as %(CHE)',
    'Domestic Private Health Expenditure (PVT-D) as %(CHE)',
    'Population',
    'Current Health Expenditure(GDP)'
]

selected_columns = df[columns_of_interest]

correlation_matrix = selected_columns.corr()

fig = px.imshow(
    correlation_matrix,
    x=correlation_matrix.columns,
    y=correlation_matrix.columns,
    color_continuous_scale='Viridis'
)

fig.update_layout(
    title='Correlation Map for DataFrame',
    width=1000,
    height=1000
)

fig.show()
In [ ]:
 

Distribution Analysis

1) What is the healthcare expenditure distribution for different categories of financing schemes in different countries of the world? - Tree map
2) How much government spends on schemes based on Income level per capita? - Dot distribution map

Trend Analysis

3) How have health spending trends changed over the years in different parts of the world? - Line charts time series and heat map
4) What percent of the total current healthcare expenditure does the government and the private sources contribute in the year 2020 for all the countries? - Bubble chart

Relationship Analysis

5) What is the relationship between population and current healthcare expenditure per capita for the USA over the observed years? - Bar chart
6) How has the evolving relationship between purchasing power parity and exchange rates worldwide since the year 2000 impacted global economic dynamics?

Ranking/Comparison Analysis

7) Which are the top 10 countries that exhibit the highest healthcare expenditure in a given year? - Area chart
8) How does health expenditure vary across income groups within different regions, and what changes have occurred from 2010 to 2020? - Sunburst charts

How much government spends on schemes based on Income level per capita?¶

In [26]:
#checking Null
df['Govn_schemes_USD_PC'].isnull().sum()
#dropping null
data_clean_usd= df.dropna(subset=['Govn_schemes_USD_PC'])
#selecting required columns
usd=data_clean_usd[['country','year','code','income','Govn_schemes_USD_PC']]
usd=usd.round(2)
usd.head()
Out[26]:
country year code income Govn_schemes_USD_PC
0 Algeria 2000 DZA Lower-middle 28.52
1 Algeria 2001 DZA Lower-middle 33.99
2 Algeria 2002 DZA Lower-middle 33.04
3 Algeria 2003 DZA Lower-middle 38.67
4 Algeria 2004 DZA Lower-middle 43.85
In [27]:
#scatter plot for Government Spending on Schemes in USD Per Capita by Country and Income Group
#creating dot plot
fig = px.scatter_geo(usd,
                     locations='code',  
                     size='Govn_schemes_USD_PC', 
                     color='income', 
                     hover_name='country',  
                     hover_data=['Govn_schemes_USD_PC'],  
                     title='Government Spending on Schemes in USD Per Capita by Country and Income Group',
                     projection='natural earth',
                     animation_frame='year',  # creates a slider based on the 'year' column
                     size_max=30,
                     color_discrete_sequence=px.colors.qualitative.Plotly) 
#setting minimum size 
fig.update_traces(marker=dict(sizemin=3))

fig.update_layout(
    geo=dict(
        showframe=False,
        showcoastlines=False,
    )
)

fig.update_traces(marker=dict(line=dict(width=0)))
fig.show()

Which are the top 10 countries that exhibit the highest healthcare expenditure in a given year?¶

In [28]:
df['current healthcare expenditure(USD)'].isnull().sum()
Out[28]:
0
In [29]:
df_che=df[['country','year','income','current healthcare expenditure(USD)']].copy()
In [30]:
df_che=df_che.round(2)
In [31]:
df_che.dropna(subset=['current healthcare expenditure(USD)'],inplace=True)
In [32]:
#line plot for an option to the user to choose the year in flask
selected_year = 2020
df_selected = df_che[df_che['year'] == selected_year]

df_selected_top10 = df_selected.nlargest(10, 'current healthcare expenditure(USD)')
fig_bar = px.line(df_selected_top10, x='country', y='current healthcare expenditure(USD)', title='Top 10 countries with current healthcare expenditure per capita(in USD)'
                 )
fig_bar.update_layout(
    xaxis=dict(tickangle=45),
    xaxis_title='Country',
    yaxis_title='healthcare expenditure per capita',
    xaxis_categoryorder='total descending'
        )
fig_bar.add_trace(px.scatter(df_selected_top10, x='country', y='current healthcare expenditure(USD)',
                             color_discrete_sequence=['red']).data[0])

What percent of the total current healthcare expenditure does the government and the private sources contribute in the year 2020 for all the countries?¶

In [33]:
#bubble chart for the year 2020
data_for_2020 = df[df['year'] == 2010]
fig6 = px.scatter(data_for_2020,
                  x='Domestic General Government Health Expenditure(GGHE-D) as %(CHE)',
                  y='Domestic Private Health Expenditure (PVT-D) as %(CHE)',
                  size='current healthcare expenditure(USD)',  
                  color='Domestic General Government Health Expenditure(GGHE-D) as %(CHE)', 
                  hover_name='country', 
                  labels={'Domestic General Government Health Expenditure(GGHE-D) as %(CHE)': 'Government Health Expenditure %', 'Domestic Private Health Expenditure (PVT-D) as %(CHE)': 'Private Health Expenditure %'},
                  title='Healthcare Financing by Government and Private Sources',
                  size_max=60)

fig6.update_layout(xaxis_title='Government Health Expenditure %',
                   yaxis_title='Private Health Expenditure %',
                   coloraxis_colorbar=dict(title='Government Health Expenditure%'))

What is the relationship between population and current healthcare expenditure per capita for the USA over the observed years?¶

In [34]:
#bar plot for the column population and current healthcare expenditure
df_usa = df[df['country'] == 'United States of America']

fig_n = px.bar(df_usa, x='year', y=['Population', 'current healthcare expenditure(USD)'],
               barmode='group',
               title='Realtionship between Population(in Thousands) and Current Healthcare Expenditure per Capita',
               labels={'value': 'Amount'},
               height=500,
               color_discrete_map={'Population': '#003366', 'current healthcare expenditure(USD)': '#FFA500'})

fig_n.show()

How have health spending trends changed over the years in different parts of the world?¶

In [35]:
health_data = pd.DataFrame(df)

# Group by year and region and calculate the average CHE as % GDP
avg_che_gdp_data = health_data.groupby(['year', 'region']).mean().reset_index()
In [36]:
avg_che_gdp_data['che_gdp_smoothed'] = avg_che_gdp_data.groupby('region')['Current Health Expenditure(GDP)'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())

# Create time series grapH
fig = px.line(avg_che_gdp_data, x='year', y='che_gdp_smoothed', color='region',
              line_shape='linear',
              title='Smoothed Current Health Expenditure as % GDP Over Time for Each Region')

fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Smoothed Current Health Expenditure as % of GDP',
    legend_title='Region',
)

fig.show()
In [37]:
#HEATMAP
fig = px.imshow(avg_che_gdp_data.pivot_table(index='region', columns='year', values='Current Health Expenditure(GDP)'),
                x=list(avg_che_gdp_data['year'].unique()),
                y=list(avg_che_gdp_data['region'].unique()),
                title='Heatmap of Current Health Expenditure as % GDP by Region and Year',
                color_continuous_scale='YlGnBu')

fig.show()

How does health expenditure vary across income groups within different regions, and what changes have occurred from 2010 to 2020?¶

In [38]:
#asigning colour to each region
region_colors = {
    'Africa': '#EC7063',   
    'America': '#AF7AC5',  
    'Eastern Mediterranean': '#48C9B0',  
    'Europe': '#7FB3D5',   
    'South-East Asia': '#F9E79F',  
    'Western Pacific': '#F5B7B1'   
}
In [39]:
# Filter data for the year 2010
df_10 = df[df['year'] == 2010]

fig = px.sunburst(df_10, path=['region','income','country'], values='Current Health Expenditure(GDP)',color='region',color_discrete_map=region_colors,
                  title='2010 Health Expenditure as % of GDP Across Income Groups (Sunburst Chart)')

fig.show()
/Users/sahithirao/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/sahithirao/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/sahithirao/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

In [40]:
# Filter data for the year 2020
df_20 = df[df['year'] == 2020]

fig = px.sunburst(df_20, path=['region','income','country'], values='Current Health Expenditure(GDP)',color='region',color_discrete_map=region_colors,
                  title='2020 Health Expenditure as % of GDP Across Income Groups (Sunburst Chart)')

fig.show()
/Users/sahithirao/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/sahithirao/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/sahithirao/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

What is the healthcare expenditure distribution for different categories of financing schemes in different countries of the world?¶

In [41]:
selected_columns = ['country', 'hf_usd', 'hf1_usd', 'hf11_usd', 'hf2_usd', 'hf21_usd', 'hf22_usd', 'hf3_usd']

# Create a new health expenditure DataFrame with selected columns
data_health_exp = df[selected_columns]
In [42]:
data_health_exp.isna().sum()
Out[42]:
country     0
hf_usd      0
hf1_usd     0
hf11_usd    0
hf2_usd     0
hf21_usd    0
hf22_usd    0
hf3_usd     0
dtype: int64
In [43]:
zeroes=data_health_exp.columns[data_health_exp.eq(0).any()]
data_health_exp[zeroes]
Out[43]:
hf2_usd hf21_usd hf22_usd
0 42.793716 15.639162 1.062985
1 44.150452 17.807416 1.100822
2 46.440352 20.205341 1.135207
3 52.393583 23.967965 1.292074
4 76.740908 43.713175 1.526492
... ... ... ...
2148 2.694593 0.275123 2.328056
2149 3.171449 0.300615 2.773491
2150 2.405740 0.328033 1.978268
2151 3.123177 0.328416 2.689382
2152 3.774666 0.329070 3.343834

2153 rows × 3 columns

In [44]:
# Melt the DataFrame to long format for better usage with Plotly Express
melted_data = pd.melt(data_health_exp, id_vars='country', var_name='financing_source', value_name='expenditure')

# Convert 'expenditure' column to numerical type and handle potential NaN values
melted_data['expenditure'] = pd.to_numeric(melted_data['expenditure'], errors='coerce')

# Remove negative or zero values
melted_data = melted_data[melted_data['expenditure'] > 0]
In [45]:
financing_source_labels = {
    'hf_usd': 'Total Health Expenditure',
    'hf1_usd': 'Government Schemes & Compulsory Contributions',
    'hf11_usd': 'Government Schemes',
    'hf2_usd': 'Voluntary Health Payment Schemes',
    'hf21_usd': 'Voluntary Health Insurance Schemes',
    'hf22_usd': 'NPISH Financing Schemes (including development agencies)',
    'hf3_usd': 'Household Out-of-Pocket Payment',
}
In [46]:
melted_data['financing_source'] = melted_data['financing_source'].map(financing_source_labels)

# Create a TreeMap using Plotly Express
fig = px.treemap(melted_data.dropna(),  # Drop rows with NaN values
                 path=['country', 'financing_source'],
                 values='expenditure',
                 title='Healthcare Expenditure TreeMap',
                 color='expenditure',
                 color_continuous_scale='Viridis',
                 color_continuous_midpoint=melted_data['expenditure'].mean())  # Set midpoint to mean value

# Show the plot
fig.show()
/Users/sahithirao/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/sahithirao/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

How has the evolving relationship between purchasing power parity, exchange rates, and out-of-pocket expenses worldwide since the year 2000 impacted global economic dynamics?¶

In [47]:
import plotly.graph_objects as go
import plotly.offline as pyo

spider_data = ['year', 'ppp', 'xrt', 'oops_che', 'region']

# Create a new health expenditure DataFrame with selected columns
data_spider_chart = data[spider_data].copy()  # Use copy() to avoid SettingWithCopyWarning
data_spider_chart.rename(columns={'ppp': 'purchasing power parity',
                                  'xrt': 'exchange rates',
                                  'oops_che': 'out-of-pocket expenses'}, inplace=True)

def get_yearwise_value(df, column_name):
    years = df.year.unique()
    yearwise_sum = []
    for year in years:
        yearwise_sum.append(df.loc[df['year'] == year, column_name].sum())
    return years, yearwise_sum

years, yearwise_ppp = get_yearwise_value(data_spider_chart, 'purchasing power parity')
# years, yearwise_xrt = get_yearwise_value(data_spider_chart, 'exchange rates')  
years, yearwise_oops_che = get_yearwise_value(data_spider_chart, 'out-of-pocket expenses')

years = [*years, years[0]]
yearwise_ppp = [*yearwise_ppp, yearwise_ppp[0]]
# yearwise_xrt = [*yearwise_xrt, yearwise_xrt[0]]  
yearwise_oops_che = [*yearwise_oops_che, yearwise_oops_che[0]]

fig = go.Figure(
    data=[
        go.Scatterpolar(r=yearwise_ppp, theta=[str(year) for year in years], name='purchasing power parity'),
        # go.Scatterpolar(r=yearwise_xrt, theta=[str(year) for year in years], name='exchange rates') 
        go.Scatterpolar(r=yearwise_oops_che, theta=[str(year) for year in years], name='out-of-pocket expenses')
    ],
    layout=go.Layout(
        title=go.layout.Title(text='Purchasing Power Parity and Out-of-pocket Expenses'),
        polar={'radialaxis': {'visible': True}},
        showlegend=True
    )
)
fig.show()
# pyo.plot(fig)